//go through all the models and output MAE and RMSE

clear all
set more off

**** GLOBALS *****;
global 	InputFile ${dat}\HSV_repdata.dta
global 	OutputDir ${res}
global straps  500
*********************;


use ${InputFile} , clear



//levels of pre and postgov inc
gen pregovinc = max(redpregovinc - tot_deduction + 0.5*fica,0)
gen dispinc = max(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1,0)

//logs of pre and postgov inc
replace lpregovinc = log(redpregovinc - tot_deduction + 0.5*fica)
replace ldispinc = log(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1)

	
forvalues samps=1/1 {	

 	
	// estimate the model by HSV and compute RMSE, MAE, and plot 
	bootstrap , reps($straps): reg ldispinc lpregovinc
	est sto log_mod
	gen indi=e(sample)
	keep if indi==1
	predict yhatt
	predict ress, r
	sum ress
	gen yhat_log=exp(yhatt+((r(sd))^2)/2)
	
	
	// calculate RMSE
	gen help=(dispinc-yhat_log)^2
	sum help if indi
	scalar rmse_log_s`samps'=sqrt(r(mean))
	drop help
	
	// calculate MAE
	gen help=abs(dispinc-yhat_log)
	sum help if indi
	scalar mae_log_s`samps'=r(mean)
	drop help

	// estimate the model using NLS and compute RMSE, MAE, and plot
	bootstrap , reps($straps): poisson dispinc lpregovinc
	est sto nl_mod
	predict yhat_nl, ir
	replace yhat_nl=. if indi!=1
	
	// calculate RMSE
	gen help=(dispinc-yhat_nl)^2
	sum help if indi
	scalar rmse_nl_s`samps'=sqrt(r(mean))
	drop help
	
	// calculate MAE
	gen help=abs(dispinc-yhat_nl)
	sum help if indi
	scalar mae_nl_s`samps'=r(mean)
	drop help
	
	//produce figure comparing absolute errors
	gen err_log=abs(dispinc-yhat_log) if indi
	gen err_nl=abs(dispinc-yhat_nl) if indi
	tw scatter err_log pregovinc , msymbol(Dh) msize(small) || scatter err_nl pregovinc  , msymbol(Oh) msize(small) scheme(s1mono) legend(order(1 "Log OLS" 2 "PPML") ring(0) position(5) col(1)) ytitle(Absolute Errors) xtitle(Gross Income) xlabel(,labsize(small)) ylabel(,labsize(small))
	graph export ${OutputDir}\abs_err_comp_s`samps'.png, replace
	
	tw scatter err_log pregovinc if pregovinc<2000000, msymbol(Dh) msize(small) || scatter err_nl pregovinc if pregovinc<2000000 , msymbol(Oh) msize(small) scheme(s1mono) legend(order(1 "Log OLS" 2 "PPML") ring(0) position(5) col(1)) ytitle(Absolute Errors) xtitle(Gross Income) xlabel(,labsize(small)) ylabel(,labsize(small))
	graph export ${OutputDir}\abs_err_compl1k_s`samps'.png, replace
	 
	//produce figure that gives reduction of weight
	gen err_rel=(dispinc-yhat_log)^2/(dispinc-yhat_nl)^2 if indi
	tw scatter err_rel pregovinc , xlabel(,labsize(small)) ylabel(,labsize(small)) msymbol(Dh) msize(tiny) scheme(s1mono) legend(off) xtitle(Gross Income) ytitle(" ")
	graph export ${OutputDir}\err_w_comp_s`samps'.png, replace
	
	tw scatter err_rel pregovinc if err_rel<100000, xlabel(,labsize(small)) ylabel(,labsize(small)) msymbol(Dh) msize(tiny) scheme(s1mono) legend(off) xtitle(Gross Income) ytitle(" ") 
	graph export ${OutputDir}\err_w_comp_s`samps'_l100k.png, replace
	
	
	//export RMSE and MAE
	mat A=[rmse_log_s`samps',mae_log_s`samps',rmse_nl_s`samps',mae_nl_s`samps']
	putexcel set "${OutputDir}\GOFs_s`samps'", sheet(1) replace
	putexcel A1=matrix(A)
	
	//export the estimates
	esttab log_mod nl_mod using "${OutputDir}\nl_log_estimates_s`samps'.tex", star(* 0.10 ** 0.05 *** 0.01) se replace
	

}
